#delimit ;

cap prog drop t3_ols ;
prog def t3_ols ;
syntax  , [bsreps(integer 3) sourcedata(string) lhs(string) rhs(string) keyrhs(string) beta0(integer 0) yearonly] ;

/* this will run the OLS regressions of Models 1 and 2 in Table 3 of Cameron & Miller, JHR, 2014(?) */
/* it transforms the LHS and RHS variables into "deviations from state and year fixed effects, and runs the 
regressions on these transformed variables.  The option "yearonly" doesn't incorporate the state fixed effects, and is
intended for Model 2 */


qui use "`sourcedata'" , replace ;
tempvar resid ;
tempfile main_data ;

local beta_hypothesis = `beta0' ;
local key_rhs = "`keyrhs'" ;

local reglhs = "`lhs'_deviation" ;
local regrhs = "`rhs'_deviation" ;

/* right now, this code only allows for one RHS variable.  to get it ready for more, we would need to 
create deviations for each RHS variable, and create a local macro that containted the names of the "deviations" RHS variables */

/* create a "deviations from state and year fixed effects" for LHS and RHS variables */
make_deviations `lhs' `rhs' , `yearonly' ;

local command = "reg `reglhs' `regrhs' , vce(ols) noconstant " ;

/* now do state and year FE models.  */
/* Row 1, default standard erorrs */
reg `reglhs' `regrhs' , vce(ols) noconstant ;
local b_fe = _b[`regrhs'] ;
local se_fe_def = _se[`regrhs'] ;
local N_fe = e(N) ;
local K_fe = e(rank) ;
predict `resid' , resid ;
cap drop resid ;
gen resid = `resid' ;

/* the wild bootstraps need residuals and predicted values from "imposing the null hypothesis", aka restricted regression */
/* construct "restricted regression; residuals and predicted values */
local shortcommand = subinstr("`command'","`regrhs'","",.) ;
qui replace `reglhs' = `reglhs'- `beta_hypothesis' * `regrhs' ;
qui `shortcommand' ;
predict resid_restricted , residual ;
predict yhat_restricted , xb ;
qui replace yhat_restricted = yhat_restricted + (`beta_hypothesis' * `regrhs' ) ;
qui replace `reglhs' = `reglhs'  + (`beta_hypothesis' * `regrhs' ) ;

/* Row 2, White robust */
reg `reglhs' `regrhs' , vce(robust) noconstant ;
local se_fe_rob = _se[`regrhs'] ;

/* Row 3, Cluster on state */
reg `reglhs' `regrhs' , vce(cluster statefip) noconstant ;
local se_fe_clu = _se[`regrhs'] ;
local G_fe = e(N_clust) ;
tempname betavec ;
matrix `betavec' = e(b) ;

sort statefip ;
qui save `main_data' , replace ;

/* 
Bonus estimation (not in Table 3): two-way clustering on state and year.
Implemented using ivreg2.  This can be downladed (as of 2014-07-06) from:
http://ideas.repec.org/c/boc/bocode/s425401.html
*/
cgmreg `reglhs' `regrhs' , cluster(statefip) noconstant ;
cgmreg `reglhs' `regrhs' , cluster(statefip year) noconstant ;



/***** get the CR2 correction, and the IK and CSS degrees of freedom *****/
/* 
2/19/2013 - for now, rename satefip "pseudo_cluster" to fit with CR23 code.  but, need to change code to accept 
an option that gives the cluster ID name 
also add option to turn off CR3, add optiont to turn off IK, turn off CSS, etc.  Maybe each one of the 4?
*/
rename statefip pseudo_cluster ;
/* Rows 4 and 5 */
CR23_IK_CSS , betavec(`betavec') lhs(`reglhs') rhs(`regrhs') key_rhs(`regrhs') 
	noconstant(1) main_data("`main_data'") ;

foreach r in se_CR2 se_CR3 dof_CR2_IK dof_CSS { ;
	local `r' = r(`r') ;
} ;
local se_fe_CR2 = `se_CR2' ;
local DOF_fe_IK =`dof_CR2_IK' ;
local DOF_fe_CSS =`dof_CSS' ;


local time1 = "$S_TIME" ;
/******* use pairs (nonparametric) bootstrap for standard errors ******/
tempfile pairs_bs_out ;
cap erase `pairs_bs_out' ;

use statefip year `lhs'* `keyrhs'* using `main_data' , replace ;

/* calls program "table3_np_boot", which is defined in t34_programs.do.  That program re-does the "deviations from means" and then 
re-runs the regression, to get the betas and (clustered) standard errors. */
qui bootstrap b=r(mybeta) se=r(myse) , reps(`bsreps') nodots cluster(statefip) idcluster(pseudo_cluster_2) 
	seed(10101) saving(`pairs_bs_out' , double)
	: table3_np_boot , lhs(`lhs') rhs(`rhs') keyrhs(`keyrhs') clusterid(pseudo_cluster_2) `yearonly' ;
local bs_misreps = e(N_misreps) ;

use `pairs_bs_out' , replace ;
qui summ b ;
local se_fe_clu_bs = r(sd) ;

keep b se ;
gen t_pairs_bs = ((b - `b_fe') / se) ;
keep t_pairs_bs ;
summ ;
save `pairs_bs_out' , replace ;

local time2 = "$S_TIME" ;

/* Wild cluster bootstrap code below for Rows 8-9 */
/* use various BS for percentile T p-values */

local main_t = (`b_fe' - `beta_hypothesis') / `se_fe_clu' ;

qui use statefip using `main_data' ;
contract statefip ;
drop _freq ;
tempfile to_be_sampled ;
sort statefip ;
qui save `to_be_sampled' , replace ;

tempfile bsout ;
cap postclose bs_output ;
cap erase `bsout' ;
qui postfile bs_output t_rad_res t_webb_res using `bsout' ;

set seed 10101 ;
qui forvalues bb = 1/`bsreps' { ;

	/* for the wild bootstrap */
	/* take the cluster list, generate 2 sets of residual transformations - one for Rademacher (row 8 of Table 3), one for Webb (row 9 of Table 3) */
	/* then merge these back onto main dataset, created transformed residuals and then transformed y-hats */
	/* then estimate the models, and save the t-statistics */

	use statefip using `to_be_sampled' , replace ;
	gen my_uniform = uniform() ;
	gen wild_rademacher = -1 + 2 * (my_uniform >= 0.5) ;
	gen wild_webb = 	(-1) * sqrt(1.5) * (my_uniform > (0) & my_uniform <= (1/6)) +  
						(-1) * sqrt(1) * (my_uniform > (1/6) & my_uniform <= (2/6))  + 
						(-1) * sqrt(0.5) * (my_uniform > (2/6) & my_uniform <= (3/6)) + 
						(+1) * sqrt(0.5) * (my_uniform > (3/6) & my_uniform <= (4/6)) + 
						(+1) * sqrt(1) * (my_uniform > (4/6) & my_uniform <= (5/6))  + 
						(+1) * sqrt(1.5) * (my_uniform > (5/6) & my_uniform <= (6/6)) ; 
	
	keep statefip wild_rademacher wild_webb ;
	sort statefip ;
	merge 1:m statefip using `main_data' , assert(match) keep(match) nogenerate ;	

	
	/* create transformed residuals and new wild-outcome-variables */
	gen resid_wild_rad_restricted = resid_restricted * wild_rademacher ;
	gen resid_wild_webb_restricted = resid_restricted * wild_webb ;
	
	gen y_wild_rademacher_restricted = yhat_restricted + resid_wild_rad_restricted ;
	gen y_wild_webb_restricted = yhat_restricted + resid_wild_webb_restricted ;

	/* now estimate cluster-robust models on each of these, generating t-statistics.
		For the restricted model, the t-stat is based on the null hypothesis. */
	
	local shortcommand = subinstr("`command'","`lhs'_deviation","y_wild_rademacher_restricted",.) ;
	`shortcommand' ;
	local b_wild_rademacher_restricted = _b[`key_rhs'] ;
	local se_wild_rademacher_restricted = _se[`key_rhs'] ;

	local shortcommand = subinstr("`command'","`lhs'_deviation","y_wild_webb_restricted",.) ;
	`shortcommand' ;
	local b_wild_webb_restricted = _b[`key_rhs'] ;
	local se_wild_webb_restricted = _se[`key_rhs'] ;

	/* make the t-stats ; store away into a postfile */

	local t_wild_rademacher_restricted  = (`b_wild_rademacher_restricted ' - `beta_hypothesis') 
		/ `se_wild_rademacher_restricted' ;
	local t_wild_webb_restricted  = (`b_wild_webb_restricted ' - `beta_hypothesis') 
		/ `se_wild_webb_restricted' ;

	post bs_output (`t_wild_rademacher_restricted') (`t_wild_webb_restricted') ;
	
} ;

qui postclose bs_output ;

local time3 = "$S_TIME" ;

/* identify percentiles in the bootstrap distribution, to get p-values and/or rejection rates */

/* to get these percentiles, we will make lists of t-stats, with the "main" t-stat stuck into this list
we want to know what the p-value of this main t-stat is.  Because with wild, few clusters, we can get intervals,
we will use mean-p-value [is this what we want?] of the interval.   */

drop _all ;
qui use `bsout' ;
qui merge 1:1 _n using `pairs_bs_out' , nogenerate ;
summ ;
qui save `bsout' , replace ;

drop _all ;
set obs 1 ;
gen t_rad_res = `main_t' ;
gen t_rad_unres = `main_t' ;
gen t_webb_res = `main_t' ;
gen t_pairs_bs = `main_t' ;

*summ ;
append using `bsout' ;
gen rank = . ;
*summ ;

foreach var in pairs_bs rad_res webb_res { ;

	sort t_`var' ;
	qui replace rank = _n ;
	qui summ t_`var' ;
	local maxrank = r(N) ;    /* should be #bs reps; but this allows for missing bs values */
	qui summ rank if abs(t_`var' - `main_t') < 0.0001 ; /* allow for machine error in t-stat computations in bootstrap step */
	local meanrank = r(mean) ;
	local pctile = `meanrank' / `maxrank' ;
	local myp = 2 * min(`pctile' , (1-`pctile')) ;
	local p_`var' = `myp' ;
	
	*di "p_`var' = `p_`var''" ;
	
} ;

local dof_nmk = `N_fe' - `K_fe' ;
local dof_gm1 = `G_fe' - 1 ;

*set trace on ;
local p_def_Tnk = 2 * ttail(`dof_nmk',(abs(`b_fe' - `beta0') / `se_fe_def')) ;
local p_rob_Tnk = 2 * ttail(`dof_nmk',(abs(`b_fe' - `beta0') / `se_fe_rob')) ;
local p_clu_Tg1 = 2 * ttail(`dof_gm1',(abs(`b_fe' - `beta0') / `se_fe_clu')) ;
local p_CR2_Tg1 = 2 * ttail(`dof_gm1',(abs(`b_fe' - `beta0') / `se_fe_CR2')) ;
local p_CR2_Tik = 2 * ttail(`DOF_fe_IK',(abs(`b_fe' - `beta0') / `se_fe_CR2')) ;
local p_bsse_Tg1 = 2 * ttail(`dof_gm1',(abs(`b_fe' - `beta0') / `se_fe_clu_bs')) ;
*set trace off ;


qui log on t3out  ;
di ; 
di ;
di "Fixed Effects, `lhs', `keyrhs'" ;
di ;
di "beta,  " 				_column(20) %8.4f `b_fe' ;
di ;
di ;
di "se default,"			_column(20) %8.4f `se_fe_def' "," 		_column(35) %6.4f `p_def_Tnk' ;
di "se rob,"				_column(20) %8.4f `se_fe_rob' "," 		_column(35) %6.4f `p_rob_Tnk'  ;
di "se clu,"				_column(20) %8.4f `se_fe_clu' "," 		_column(35) %6.4f `p_clu_Tg1'  ;
di "se CR2,"				_column(20) %8.4f `se_fe_CR2' "," 		_column(35) %6.4f `p_CR2_Tg1'  ; 
di "se CR2 wIK,"			_column(20) %8.4f `se_fe_CR2' "," 		_column(35) %6.4f `p_CR2_Tik'  ; 
di "se pairs BS,"			_column(20) %8.4f `se_fe_clu_bs' "," 	_column(35) %6.4f `p_bsse_Tg1'  ; 
di ;
di ;
di "percentile T pairs, ,"											_column(35) %6.4f `p_pairs_bs' ;
di "percentile T Rad2, ,"											_column(35) %6.4f `p_rad_res' ;
di "percentile T Webb6, ,"											_column(35) %6.4f `p_webb_res' ;
di ;
di "I-K DOF,"				_column(20) %8.0f `DOF_fe_IK' ;
di "CSS effctv clustrs,"	_column(20) %8.0f `DOF_fe_CSS' ;
di "Num obs,"				_column(20) %8.0f `N_fe' ;
di "Num clusters,"			_column(20) %8.0f `G_fe' ;
qui log off t3out ;

di "`time1'" ;
di "`time2'" ;
di "`time3'" ;

end ;


